package cn.stronglink.crawler.common;

import java.io.IOException;
import java.io.InputStream;

import org.apache.http.HttpEntity;
import org.apache.http.HttpStatus;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import com.google.common.base.Preconditions;

/**
 * 获取网页信息
 * 
 * @author yuzhantao
 *
 */
public class RequestAndResponseTool {
	private final static Logger logger = LogManager.getLogger(RequestAndResponseTool.class);

	public static HtmlPage sendRequstAndGetResponse(String url) throws ClientProtocolException, IOException {
		HtmlPage page = null;
		// 1.生成 HttpClinet 对象并设置参数
		CloseableHttpClient client = HttpClients.createDefault();
		HttpGet httpGet = new HttpGet(url);
		RequestConfig requestConfig = RequestConfig.custom()
				.setConnectTimeout(50000).setConnectionRequestTimeout(10000)
				.setSocketTimeout(50000).build();

		httpGet.setConfig(requestConfig);
		CloseableHttpResponse httpResponse = client.execute(httpGet);
		try {
			// 判断访问的状态码
			Preconditions.checkArgument(httpResponse.getStatusLine().getStatusCode() == HttpStatus.SC_OK,
					"网页返回状态码: " + httpResponse.getStatusLine().getStatusCode());
			HttpEntity httpEntity = httpResponse.getEntity();
			String contentType = httpEntity.getContentType().getValue(); // 得到当前返回类型
			if (httpEntity != null) {
				InputStream is = httpEntity.getContent();
				try {
					int len = (int) httpEntity.getContentLength();
					logger.info("获取网页数据长度 ContentLength=" + len);
					byte[] datas = new byte[len];
					StringBuilder sb = new StringBuilder(); 
		            while((len=is.read(datas))!=-1){
		               sb.append(new String(datas,0,len));
		            }
					page = new HtmlPage(sb.toString().getBytes(), url, contentType); // 封装成为页面
				} finally {
					is.close(); // 关闭entity的输入流
				}
			}
		} finally {
			// 释放连接
			httpResponse.close();
		}
		return page;
	}
}
